# Names found in `abalone.names` information file.
cols = c("sex",
"length",
"diam",
"height",
"weight_whole",
"weight_shucked",
"weight_viscera",
"weight_shell",
"rings")
# Read the data from `abalone.data` file.
data_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data"
raw_data = readr::read_csv(data_url, cols)
# Correct the automatic variable typing
data = raw_data %>% mutate(sex = factor(sex, c("M", "F", "I")))
p1 = ggplot(data, aes(x = length, y = rings)) +
geom_point() + theme_classic(base_size = 5) +
labs(x = "length", y = "rings") +
geom_smooth(method = "lm", se = FALSE)
p2 = ggplot(data, aes(x = diam, y = rings)) +
geom_point() + theme_classic(base_size = 5) +
labs(x = "diam", y = "rings") +
geom_smooth(method = "lm", se = FALSE)
p3 = ggplot(data, aes(x = height, y = rings)) +
geom_point() + theme_classic(base_size = 5) +
labs(x = "height", y = "rings") +
geom_smooth(method = "lm", se = FALSE)
p4 = ggplot(data, aes(x = weight_whole, y = rings)) +
geom_point() + theme_classic(base_size = 5) +
labs(x = "weight_whole", y = "rings") +
geom_smooth(method = "lm", se = FALSE)
p5 = ggplot(data, aes(x = weight_shucked, y = rings)) +
geom_point() + theme_classic(base_size = 5) +
labs(x = "weight_shucked", y = "rings") +
geom_smooth(method = "lm", se = FALSE)
p6 = ggplot(data, aes(x = weight_viscera, y = rings)) +
geom_point() + theme_classic(base_size = 5) +
labs(x = "weight_viscera", y = "rings") +
geom_smooth(method = "lm", se = FALSE)
p7 = ggplot(data, aes(x = weight_shell, y = rings)) +
geom_point() + theme_classic(base_size = 5) +
labs(x = "weight_shell", y = "rings") +
geom_smooth(method = "lm", se = FALSE)
grid.arrange(p1, p2, p3, p4, p5, p6, p7, nrow = 3)
lm1 = lm(rings ~ length,
data = data)
lm1
##
## Call:
## lm(formula = rings ~ length, data = data)
##
## Coefficients:
## (Intercept) length
## 2.102 14.946
data1 = data %>%
mutate(
resid1 = lm1$residuals,
fitted1 = lm1$fitted.values
)
q1 = ggplot(data1,
aes(x = length, y = resid1)) +
geom_point(size = 3) +
theme_classic(base_size = 5) +
labs(x = "length",
y = "Residual") +
geom_hline(yintercept = 0, col = "red") +
geom_smooth(method = "loess", se = FALSE)
grid.arrange(p1, q1, nrow = 1)
lm2 = lm(rings ~ diam,
data = data)
lm2
##
## Call:
## lm(formula = rings ~ diam, data = data)
##
## Coefficients:
## (Intercept) diam
## 2.319 18.670
data2 = data %>%
mutate(
resid2 = lm2$residuals,
fitted2 = lm2$fitted.values
)
q2 = ggplot(data2,
aes(x = diam, y = resid2)) +
geom_point(size = 3) +
theme_classic(base_size = 5) +
labs(x = "diam",
y = "Residual") +
geom_hline(yintercept = 0, col = "red")+
geom_smooth(method = "loess", se = FALSE)
grid.arrange(p2, q2, nrow = 1)
data = data %>% filter(height < 0.5)
lm3 = lm(rings ~ height,
data = data)
lm3
##
## Call:
## lm(formula = rings ~ height, data = data)
##
## Coefficients:
## (Intercept) height
## 2.825 51.078
data3 = data %>%
mutate(
resid3 = lm3$residuals,
fitted3 = lm3$fitted.values
)
q3 = ggplot(data3,
aes(x = height, y = resid3)) +
geom_point(size = 3) +
theme_classic(base_size = 5) +
labs(x = "height",
y = "Residual") +
geom_hline(yintercept = 0, col = "red")+
geom_smooth(method = "loess", se = FALSE)
grid.arrange(p3, q3, nrow = 1)
lm4 = lm(rings ~ weight_whole,
data = data)
lm4
##
## Call:
## lm(formula = rings ~ weight_whole, data = data)
##
## Coefficients:
## (Intercept) weight_whole
## 6.985 3.559
data4 = data %>%
mutate(
resid4 = lm4$residuals,
fitted4 = lm4$fitted.values
)
q4 = ggplot(data4,
aes(x = weight_whole, y = resid4)) +
geom_point(size = 3) +
theme_classic(base_size = 5) +
labs(x = "weight_whole",
y = "Residual") +
geom_hline(yintercept = 0, col = "red")+
geom_smooth(method = "loess", se = FALSE)
grid.arrange(p4, q4, nrow = 1)
lm5 = lm(rings ~ weight_shucked,
data = data)
lm5
##
## Call:
## lm(formula = rings ~ weight_shucked, data = data)
##
## Coefficients:
## (Intercept) weight_shucked
## 7.732 6.130
fitted5 = 7.737 + 6.114 * data$weight_shucked
resid5 = data$rings - fitted5
data5 = data %>%
mutate(
resid5 = lm5$residuals,
fitted5 = lm5$fitted.values
)
q5 = ggplot(data5,
aes(x = weight_shucked, y = resid5)) +
geom_point(size = 3) +
theme_classic(base_size = 5) +
labs(x = "weight_shucked",
y = "Residual") +
geom_hline(yintercept = 0, col = "red")+
geom_smooth(method = "loess", se = FALSE)
grid.arrange(p5, q5, nrow = 1)
lm6 = lm(rings ~ weight_viscera,
data = data)
lm6
##
## Call:
## lm(formula = rings ~ weight_viscera, data = data)
##
## Coefficients:
## (Intercept) weight_viscera
## 7.254 14.845
data6 = data %>%
mutate(
resid6 = lm6$residuals,
fitted6 = lm6$fitted.values
)
q6 = ggplot(data6,
aes(x = weight_viscera, y = resid6)) +
geom_point(size = 3) +
theme_classic(base_size = 5) +
labs(x = "weight_viscera",
y = "Residual") +
geom_hline(yintercept = 0, col = "red")+
geom_smooth(method = "loess", se = FALSE)
grid.arrange(p6, q6, nrow = 1)
lm7 = lm(rings ~ weight_shell,
data = data)
lm7
##
## Call:
## lm(formula = rings ~ weight_shell, data = data)
##
## Coefficients:
## (Intercept) weight_shell
## 6.46 14.55
fitted7 = 6.462 + 14.536 * data$weight_shell
resid7 = data$rings - fitted7
data7 = data %>%
mutate(
resid7 = lm7$residuals,
fitted7 = lm7$fitted.values
)
q7 = ggplot(data7,
aes(x = weight_shell, y = resid7)) +
geom_point(size = 3) +
theme_classic(base_size = 5) +
labs(x = "weight_shell",
y = "Residual") +
geom_hline(yintercept = 0, col = "red")+
geom_smooth(method = "loess", se = FALSE)
grid.arrange(p7, q7, nrow = 1)
library(ggfortify)
autoplot(lm1, which = 1:2)
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
autoplot(lm2, which = 1:2)
autoplot(lm3, which = 1:2)
autoplot(lm4, which = 1:2)
autoplot(lm5, which = 1:2)
autoplot(lm6, which = 1:2)
autoplot(lm7, which = 1:2)
# install.packages("GGally")
library(GGally)
my_fn <- function(data, mapping, ...){
p <- ggplot(data = data, mapping = mapping) +
geom_point() +
geom_smooth(method=loess, fill="red", color="red", se = FALSE)
# geom_smooth(method=lm, fill="blue", color="blue", se = FALSE) # this plots linear line
p
}
GGally::ggpairs(data, columns = 1:9, lower = list(continuous = my_fn)) + theme_bw(base_size = 10)
lmTotal = lm(rings ~ ., data)
summary(lmTotal)$coefficients %>% round(4)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.6226 0.2866 12.6401 0.0000
## sexF -0.0559 0.0828 -0.6742 0.5002
## sexI -0.8449 0.0953 -8.8683 0.0000
## length -1.1593 1.8007 -0.6438 0.5198
## diam 9.2492 2.2270 4.1532 0.0000
## height 23.2520 2.2763 10.2149 0.0000
## weight_whole 8.8537 0.7210 12.2791 0.0000
## weight_shucked -19.3888 0.8142 -23.8131 0.0000
## weight_viscera -11.1209 1.2878 -8.6358 0.0000
## weight_shell 7.6898 1.1267 6.8249 0.0000
lmTotal
##
## Call:
## lm(formula = rings ~ ., data = data)
##
## Coefficients:
## (Intercept) sexF sexI length diam
## 3.62262 -0.05586 -0.84490 -1.15926 9.24922
## height weight_whole weight_shucked weight_viscera weight_shell
## 23.25204 8.85371 -19.38877 -11.12089 7.68979
# remotes::install_github("datalorax/equatiomatic")
library(equatiomatic)
extract_eq(lmTotal, use_coefs = TRUE)
## $$
## \operatorname{rings} = 3.62 - 0.06(\operatorname{sex}_{\operatorname{F}}) - 0.84(\operatorname{sex}_{\operatorname{I}}) - 1.16(\operatorname{length}) + 9.25(\operatorname{diam}) + 23.25(\operatorname{height}) + 8.85(\operatorname{weight\_whole}) - 19.39(\operatorname{weight\_shucked}) - 11.12(\operatorname{weight\_viscera}) + 7.69(\operatorname{weight\_shell}) + \epsilon
## $$
data = data %>%
mutate(lorings = log(rings))
p1 = ggplot(data, aes(x = length, y = lorings)) +
geom_point() + theme_classic(base_size = 5) +
labs(x = "length", y = "lorings") +
geom_smooth(method = "lm", se = FALSE)
p2 = ggplot(data, aes(x = diam, y = lorings)) +
geom_point() + theme_classic(base_size = 5) +
labs(x = "diam", y = "lorings") +
geom_smooth(method = "lm", se = FALSE)
p3 = ggplot(data, aes(x = height, y = lorings)) +
geom_point() + theme_classic(base_size = 5) +
labs(x = "height", y = "lorings") +
geom_smooth(method = "lm", se = FALSE)
p4 = ggplot(data, aes(x = weight_whole, y = lorings)) +
geom_point() + theme_classic(base_size = 5) +
labs(x = "weight_whole", y = "lorings") +
geom_smooth(method = "lm", se = FALSE)
p5 = ggplot(data, aes(x = weight_shucked, y = lorings)) +
geom_point() + theme_classic(base_size = 5) +
labs(x = "weight_shucked", y = "lorings") +
geom_smooth(method = "lm", se = FALSE)
p6 = ggplot(data, aes(x = weight_viscera, y = lorings)) +
geom_point() + theme_classic(base_size = 5) +
labs(x = "weight_viscera", y = "lorings") +
geom_smooth(method = "lm", se = FALSE)
p7 = ggplot(data, aes(x = weight_shell, y = lorings)) +
geom_point() + theme_classic(base_size = 5) +
labs(x = "weight_shell", y = "lorings") +
geom_smooth(method = "lm", se = FALSE)
grid.arrange(p1, p2, p3, p4, p5, p6, p7, nrow = 3)
lm1 = lm(lorings ~ length,
data = data)
data1 = data %>%
mutate(
resid1 = lm1$residuals,
fitted1 = lm1$fitted.values
)
q1 = ggplot(data1,
aes(x = length, y = resid1)) +
geom_point(size = 3) +
theme_classic(base_size = 5) +
labs(x = "length",
y = "Residual") +
geom_hline(yintercept = 0, col = "red") +
geom_smooth(method = "loess", se = FALSE)
grid.arrange(p1, q1, nrow = 1)
lm2 = lm(lorings ~ diam,
data = data)
data2 = data %>%
mutate(
resid2 = lm2$residuals,
fitted2 = lm2$fitted.values
)
q2 = ggplot(data2,
aes(x = diam, y = resid2)) +
geom_point(size = 3) +
theme_classic(base_size = 5) +
labs(x = "diam",
y = "Residual") +
geom_hline(yintercept = 0, col = "red")+
geom_smooth(method = "loess", se = FALSE)
grid.arrange(p2, q2, nrow = 1)
lm3 = lm(lorings ~ height,
data = data)
data3 = data %>%
mutate(
resid3 = lm3$residuals,
fitted3 = lm3$fitted.values
)
q3 = ggplot(data3,
aes(x = height, y = resid3)) +
geom_point(size = 3) +
theme_classic(base_size = 5) +
labs(x = "height",
y = "Residual") +
geom_hline(yintercept = 0, col = "red")+
geom_smooth(method = "loess", se = FALSE)
grid.arrange(p3, q3, nrow = 1)
lm4 = lm(lorings ~ weight_whole,
data = data)
data4 = data %>%
mutate(
resid4 = lm4$residuals,
fitted4 = lm4$fitted.values
)
q4 = ggplot(data4,
aes(x = weight_whole, y = resid4)) +
geom_point(size = 3) +
theme_classic(base_size = 5) +
labs(x = "weight_whole",
y = "Residual") +
geom_hline(yintercept = 0, col = "red")+
geom_smooth(method = "loess", se = FALSE)
grid.arrange(p4, q4, nrow = 1)
lm5 = lm(lorings ~ weight_shucked,
data = data)
data5 = data %>%
mutate(
resid5 = lm5$residuals,
fitted5 = lm5$fitted.values
)
q5 = ggplot(data5,
aes(x = weight_shucked, y = resid5)) +
geom_point(size = 3) +
theme_classic(base_size = 5) +
labs(x = "weight_shucked",
y = "Residual") +
geom_hline(yintercept = 0, col = "red")+
geom_smooth(method = "loess", se = FALSE)
grid.arrange(p5, q5, nrow = 1)
lm6 = lm(lorings ~ weight_viscera,
data = data)
data6 = data %>%
mutate(
resid6 = lm6$residuals,
fitted6 = lm6$fitted.values
)
q6 = ggplot(data6,
aes(x = weight_viscera, y = resid6)) +
geom_point(size = 3) +
theme_classic(base_size = 5) +
labs(x = "weight_viscera",
y = "Residual") +
geom_hline(yintercept = 0, col = "red")+
geom_smooth(method = "loess", se = FALSE)
grid.arrange(p6, q6, nrow = 1)
lm7 = lm(lorings ~ weight_shell,
data = data)
data7 = data %>%
mutate(
resid7 = lm7$residuals,
fitted7 = lm7$fitted.values
)
q7 = ggplot(data7,
aes(x = weight_shell, y = resid7)) +
geom_point(size = 3) +
theme_classic(base_size = 5) +
labs(x = "weight_shell",
y = "Residual") +
geom_hline(yintercept = 0, col = "red")+
geom_smooth(method = "loess", se = FALSE)
grid.arrange(p7, q7, nrow = 1)
library(ggfortify)
autoplot(lm1, which = 1:2)
# data2 %>% ggplot() +
# aes(sample = resid2) +
# geom_qq(size = 2) + geom_qq_line()
autoplot(lm2, which = 1:2)
# data3 %>% ggplot() +
# aes(sample = resid3) +
# geom_qq(size = 2) + geom_qq_line()
autoplot(lm3, which = 1:2)
# data4 %>% ggplot() +
# aes(sample = resid4) +
# geom_qq(size = 2) + geom_qq_line()
autoplot(lm4, which = 1:2)
# data5 %>% ggplot() +
# aes(sample = resid5) +
# geom_qq(size = 2) + geom_qq_line()
autoplot(lm5, which = 1:2)
# data6 %>% ggplot() +
# aes(sample = resid6) +
# geom_qq(size = 2) + geom_qq_line()
autoplot(lm6, which = 1:2)
autoplot(lm7, which = 1:2)
# install.packages("GGally")
library(GGally)
my_fn <- function(data, mapping, ...){
p <- ggplot(data = data, mapping = mapping) +
geom_point() +
geom_smooth(method=loess, fill="red", color="red", se = FALSE)
# geom_smooth(method=lm, fill="blue", color="blue", se = FALSE)
p
}
GGally::ggpairs(data, columns = 1:9, lower = list(continuous = my_fn)) + theme_bw(base_size = 10)
lmTotal = lm(rings ~ ., data)
summary(lmTotal)$coefficients %>% round(4)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.9368 0.1129 -88.0262 0.00
## sexF 0.0339 0.0258 1.3106 0.19
## sexI 0.1590 0.0301 5.2757 0.00
## length -5.8684 0.5621 -10.4410 0.00
## diam -3.3616 0.6974 -4.8199 0.00
## height -3.4500 0.7227 -4.7736 0.00
## weight_whole 2.7140 0.2270 11.9552 0.00
## weight_shucked -2.7349 0.2677 -10.2179 0.00
## weight_viscera -1.8789 0.4043 -4.6469 0.00
## weight_shell 2.6247 0.3523 7.4501 0.00
## lorings 10.3227 0.0525 196.6333 0.00
lmTotal
##
## Call:
## lm(formula = rings ~ ., data = data)
##
## Coefficients:
## (Intercept) sexF sexI length diam
## -9.93681 0.03386 0.15904 -5.86843 -3.36159
## height weight_whole weight_shucked weight_viscera weight_shell
## -3.44999 2.71398 -2.73492 -1.87887 2.62475
## lorings
## 10.32268
extract_eq(lmTotal, use_coefs = TRUE)
## $$
## \operatorname{rings} = -9.94 + 0.03(\operatorname{sex}_{\operatorname{F}}) + 0.16(\operatorname{sex}_{\operatorname{I}}) - 5.87(\operatorname{length}) - 3.36(\operatorname{diam}) - 3.45(\operatorname{height}) + 2.71(\operatorname{weight\_whole}) - 2.73(\operatorname{weight\_shucked}) - 1.88(\operatorname{weight\_viscera}) + 2.62(\operatorname{weight\_shell}) + 10.32(\operatorname{lorings}) + \epsilon
## $$
data = data %>%
mutate(length = log(length),
diam = log(diam),
height = log(height),
weight_whole = log(weight_whole),
weight_shucked = log(weight_shucked),
weight_viscera = log(weight_viscera),
weight_shell = log(weight_shell))
data = data %>%
filter(is.finite(height))
p1 = ggplot(data, aes(x = length, y = rings)) +
geom_point() + theme_classic(base_size = 5) +
labs(x = "llength", y = "rings") +
geom_smooth(method = "lm", se = FALSE)
p2 = ggplot(data, aes(x = diam, y = rings)) +
geom_point() + theme_classic(base_size = 5) +
labs(x = "ldiam", y = "rings") +
geom_smooth(method = "lm", se = FALSE)
p3 = ggplot(data, aes(x = height, y = rings)) +
geom_point() + theme_classic(base_size = 5) +
labs(x = "lheight", y = "rings") +
geom_smooth(method = "lm", se = FALSE)
p4 = ggplot(data, aes(x = weight_whole, y = rings)) +
geom_point() + theme_classic(base_size = 5) +
labs(x = "lweight_whole", y = "rings") +
geom_smooth(method = "lm", se = FALSE)
p5 = ggplot(data, aes(x = weight_shucked, y = rings)) +
geom_point() + theme_classic(base_size = 5) +
labs(x = "lweight_shucked", y = "rings") +
geom_smooth(method = "lm", se = FALSE)
p6 = ggplot(data, aes(x = weight_viscera, y = rings)) +
geom_point() + theme_classic(base_size = 5) +
labs(x = "lweight_viscera", y = "rings") +
geom_smooth(method = "lm", se = FALSE)
p7 = ggplot(data, aes(x = weight_shell, y = rings)) +
geom_point() + theme_classic(base_size = 5) +
labs(x = "lweight_shell", y = "rings") +
geom_smooth(method = "lm", se = FALSE)
grid.arrange(p1, p2, p3, p4, p5, p6, p7, nrow = 3)
lm1 = lm(rings ~ length,
data = data)
lm1
##
## Call:
## lm(formula = rings ~ length, data = data)
##
## Coefficients:
## (Intercept) length
## 14.446 6.648
data1 = data %>%
mutate(
resid1 = lm1$residuals,
fitted1 = lm1$fitted.values
)
q1 = ggplot(data1,
aes(x = length, y = resid1)) +
geom_point(size = 3) +
theme_classic(base_size = 5) +
labs(x = "length",
y = "Residual") +
geom_hline(yintercept = 0, col = "red") +
geom_smooth(method = "loess", se = FALSE)
grid.arrange(p1, q1, nrow = 1)
lm3 = lm(lorings ~ height,
data = data)
lm3
##
## Call:
## lm(formula = lorings ~ height, data = data)
##
## Coefficients:
## (Intercept) height
## 3.6369 0.6894
data3 = data %>%
mutate(
resid3 = lm3$residuals,
fitted3 = lm3$fitted.values
)
q3 = ggplot(data3,
aes(x = height, y = resid3)) +
geom_point(size = 3) +
theme_classic(base_size = 5) +
labs(x = "lheight",
y = "Residual") +
geom_hline(yintercept = 0, col = "red")+
geom_smooth(method = "loess", se = FALSE)
grid.arrange(p3, q3, nrow = 1)
lm4 = lm(lorings ~ weight_whole,
data = data)
lm4
##
## Call:
## lm(formula = lorings ~ weight_whole, data = data)
##
## Coefficients:
## (Intercept) weight_whole
## 2.3630 0.2661
data4 = data %>%
mutate(
resid4 = lm4$residuals,
fitted4 = lm4$fitted.values
)
q4 = ggplot(data4,
aes(x = weight_whole, y = resid4)) +
geom_point(size = 3) +
theme_classic(base_size = 5) +
labs(x = "lweight_whole",
y = "Residual") +
geom_hline(yintercept = 0, col = "red")+
geom_smooth(method = "loess", se = FALSE)
grid.arrange(p4, q4, nrow = 1)
lm5 = lm(lorings ~ weight_shucked,
data = data)
lm5
##
## Call:
## lm(formula = lorings ~ weight_shucked, data = data)
##
## Coefficients:
## (Intercept) weight_shucked
## 2.5471 0.2334
data5 = data %>%
mutate(
resid5 = lm5$residuals,
fitted5 = lm5$fitted.values
)
q5 = ggplot(data5,
aes(x = weight_shucked, y = resid5)) +
geom_point(size = 3) +
theme_classic(base_size = 5) +
labs(x = "lweight_shucked",
y = "Residual") +
geom_hline(yintercept = 0, col = "red")+
geom_smooth(method = "loess", se = FALSE)
grid.arrange(p5, q5, nrow = 1)
lm6 = lm(lorings ~ weight_viscera,
data = data)
lm6
##
## Call:
## lm(formula = lorings ~ weight_viscera, data = data)
##
## Coefficients:
## (Intercept) weight_viscera
## 2.7506 0.2558
data6 = data %>%
mutate(
resid6 = lm6$residuals,
fitted6 = lm6$fitted.values
)
q6 = ggplot(data6,
aes(x = weight_viscera, y = resid6)) +
geom_point(size = 3) +
theme_classic(base_size = 5) +
labs(x = "lweight_viscera",
y = "Residual") +
geom_hline(yintercept = 0, col = "red")+
geom_smooth(method = "loess", se = FALSE)
grid.arrange(p6, q6, nrow = 1)
lm7 = lm(lorings ~ weight_shell,
data = data)
lm7
##
## Call:
## lm(formula = lorings ~ weight_shell, data = data)
##
## Coefficients:
## (Intercept) weight_shell
## 2.7331 0.2914
data7 = data %>%
mutate(
resid7 = lm7$residuals,
fitted7 = lm7$fitted.values
)
q7 = ggplot(data7,
aes(x = weight_shell, y = resid7)) +
geom_point(size = 3) +
theme_classic(base_size = 5) +
labs(x = "lweight_shell",
y = "Residual") +
geom_hline(yintercept = 0, col = "red")+
geom_smooth(method = "loess", se = FALSE)
grid.arrange(p7, q7, nrow = 1)
library(ggfortify)
autoplot(lm1, which = 1:2)
autoplot(lm2, which = 1:2)
autoplot(lm3, which = 1:2)
autoplot(lm4, which = 1:2)
autoplot(lm5, which = 1:2)
autoplot(lm6, which = 1:2)
autoplot(lm7, which = 1:2)
# install.packages("GGally")
library(GGally)
my_fn <- function(data, mapping, ...){
p <- ggplot(data = data, mapping = mapping) +
geom_point() +
geom_smooth(method=loess, fill="red", color="red", se = FALSE)
# geom_smooth(method=lm, fill="blue", color="blue", se = FALSE)
p
}
GGally::ggpairs(data, columns = 1:9, lower = list(continuous = my_fn)) + theme_bw(base_size = 10)
lmTotal = lm(rings ~ ., data)
summary(lmTotal)$coefficients %>% round(4)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -18.0457 0.3243 -55.6473 0.0000
## sexF 0.0534 0.0272 1.9649 0.0495
## sexI 0.0501 0.0318 1.5730 0.1158
## length -1.5621 0.3139 -4.9758 0.0000
## diam -1.2938 0.2888 -4.4805 0.0000
## height -0.1962 0.1025 -1.9144 0.0556
## weight_whole 1.4840 0.1887 7.8625 0.0000
## weight_shucked -0.5929 0.0996 -5.9533 0.0000
## weight_viscera -0.4602 0.0717 -6.4205 0.0000
## weight_shell -0.0211 0.0987 -0.2135 0.8309
## lorings 10.7884 0.0581 185.5459 0.0000
lmTotal
##
## Call:
## lm(formula = rings ~ ., data = data)
##
## Coefficients:
## (Intercept) sexF sexI length diam
## -18.04574 0.05343 0.05007 -1.56208 -1.29377
## height weight_whole weight_shucked weight_viscera weight_shell
## -0.19619 1.48397 -0.59289 -0.46021 -0.02108
## lorings
## 10.78844
extract_eq(lmTotal, use_coefs = TRUE)
## $$
## \operatorname{rings} = -18.05 + 0.05(\operatorname{sex}_{\operatorname{F}}) + 0.05(\operatorname{sex}_{\operatorname{I}}) - 1.56(\operatorname{length}) - 1.29(\operatorname{diam}) - 0.2(\operatorname{height}) + 1.48(\operatorname{weight\_whole}) - 0.59(\operatorname{weight\_shucked}) - 0.46(\operatorname{weight\_viscera}) - 0.02(\operatorname{weight\_shell}) + 10.79(\operatorname{lorings}) + \epsilon
## $$
data = data %>%
mutate(rings = log(rings))
p1 = ggplot(data, aes(x = length, y = rings)) +
geom_point() + theme_classic(base_size = 5) +
labs(x = "llength", y = "lorings") +
geom_smooth(method = "lm", se = FALSE)
p2 = ggplot(data, aes(x = diam, y = rings)) +
geom_point() + theme_classic(base_size = 5) +
labs(x = "ldiam", y = "lorings") +
geom_smooth(method = "lm", se = FALSE)
p3 = ggplot(data, aes(x = height, y = rings)) +
geom_point() + theme_classic(base_size = 5) +
labs(x = "lheight", y = "lorings") +
geom_smooth(method = "lm", se = FALSE)
p4 = ggplot(data, aes(x = weight_whole, y = rings)) +
geom_point() + theme_classic(base_size = 5) +
labs(x = "lweight_whole", y = "lorings") +
geom_smooth(method = "lm", se = FALSE)
p5 = ggplot(data, aes(x = weight_shucked, y = rings)) +
geom_point() + theme_classic(base_size = 5) +
labs(x = "lweight_shucked", y = "lorings") +
geom_smooth(method = "lm", se = FALSE)
p6 = ggplot(data, aes(x = weight_viscera, y = rings)) +
geom_point() + theme_classic(base_size = 5) +
labs(x = "lweight_viscera", y = "lorings") +
geom_smooth(method = "lm", se = FALSE)
p7 = ggplot(data, aes(x = weight_shell, y = rings)) +
geom_point() + theme_classic(base_size = 5) +
labs(x = "lweight_shell", y = "lorings") +
geom_smooth(method = "lm", se = FALSE)
grid.arrange(p1, p2, p3, p4, p5, p6, p7, nrow = 3)
lm1 = lm(rings ~ length,
data = data)
lm1
##
## Call:
## lm(formula = rings ~ length, data = data)
##
## Coefficients:
## (Intercept) length
## 2.7872 0.7978
data1 = data %>%
mutate(
resid1 = lm1$residuals,
fitted1 = lm1$fitted.values
)
q1 = ggplot(data1,
aes(x = length, y = resid1)) +
geom_point(size = 3) +
theme_classic(base_size = 5) +
labs(x = "length",
y = "Residual") +
geom_hline(yintercept = 0, col = "red") +
geom_smooth(method = "loess", se = FALSE)
grid.arrange(p1, q1, nrow = 1)
lm2 = lm(rings ~ diam,
data = data)
lm2
##
## Call:
## lm(formula = rings ~ diam, data = data)
##
## Coefficients:
## (Intercept) diam
## 2.9547 0.7592
data2 = data %>%
mutate(
resid2 = lm2$residuals,
fitted2 = lm2$fitted.values
)
q2 = ggplot(data2,
aes(x = diam, y = resid2)) +
geom_point(size = 3) +
theme_classic(base_size = 5) +
labs(x = "ldiam",
y = "Residual") +
geom_hline(yintercept = 0, col = "red")+
geom_smooth(method = "loess", se = FALSE)
grid.arrange(p2, q2, nrow = 1)
lm3 = lm(rings ~ height,
data = data)
lm3
##
## Call:
## lm(formula = rings ~ height, data = data)
##
## Coefficients:
## (Intercept) height
## 3.6369 0.6894
data3 = data %>%
mutate(
resid3 = lm3$residuals,
fitted3 = lm3$fitted.values
)
q3 = ggplot(data3,
aes(x = height, y = resid3)) +
geom_point(size = 3) +
theme_classic(base_size = 5) +
labs(x = "lheight",
y = "Residual") +
geom_hline(yintercept = 0, col = "red")+
geom_smooth(method = "loess", se = FALSE)
grid.arrange(p3, q3, nrow = 1)
lm4 = lm(rings ~ weight_whole,
data = data)
lm4
##
## Call:
## lm(formula = rings ~ weight_whole, data = data)
##
## Coefficients:
## (Intercept) weight_whole
## 2.3630 0.2661
data4 = data %>%
mutate(
resid4 = lm4$residuals,
fitted4 = lm4$fitted.values
)
q4 = ggplot(data4,
aes(x = weight_whole, y = resid4)) +
geom_point(size = 3) +
theme_classic(base_size = 5) +
labs(x = "lweight_whole",
y = "Residual") +
geom_hline(yintercept = 0, col = "red")+
geom_smooth(method = "loess", se = FALSE)
grid.arrange(p4, q4, nrow = 1)
lm5 = lm(rings ~ weight_shucked,
data = data)
lm5
##
## Call:
## lm(formula = rings ~ weight_shucked, data = data)
##
## Coefficients:
## (Intercept) weight_shucked
## 2.5471 0.2334
data5 = data %>%
mutate(
resid5 = lm5$residuals,
fitted5 = lm5$fitted.values
)
q5 = ggplot(data5,
aes(x = weight_shucked, y = resid5)) +
geom_point(size = 3) +
theme_classic(base_size = 5) +
labs(x = "lweight_shucked",
y = "Residual") +
geom_hline(yintercept = 0, col = "red")+
geom_smooth(method = "loess", se = FALSE)
grid.arrange(p5, q5, nrow = 1)
lm6 = lm(rings ~ weight_viscera,
data = data)
lm6
##
## Call:
## lm(formula = rings ~ weight_viscera, data = data)
##
## Coefficients:
## (Intercept) weight_viscera
## 2.7506 0.2558
data6 = data %>%
mutate(
resid6 = lm6$residuals,
fitted6 = lm6$fitted.values
)
q6 = ggplot(data6,
aes(x = weight_viscera, y = resid6)) +
geom_point(size = 3) +
theme_classic(base_size = 5) +
labs(x = "lweight_viscera",
y = "Residual") +
geom_hline(yintercept = 0, col = "red")+
geom_smooth(method = "loess", se = FALSE)
grid.arrange(p6, q6, nrow = 1)
lm7 = lm(rings ~ weight_shell,
data = data)
lm7
##
## Call:
## lm(formula = rings ~ weight_shell, data = data)
##
## Coefficients:
## (Intercept) weight_shell
## 2.7331 0.2914
data7 = data %>%
mutate(
resid7 = lm7$residuals,
fitted7 = lm7$fitted.values
)
q7 = ggplot(data7,
aes(x = weight_shell, y = resid7)) +
geom_point(size = 3) +
theme_classic(base_size = 5) +
labs(x = "lweight_shell",
y = "Residual") +
geom_hline(yintercept = 0, col = "red")+
geom_smooth(method = "loess", se = FALSE)
grid.arrange(p7, q7, nrow = 1)
# data1 %>% ggplot() +
# aes(sample = resid1) +
# geom_qq(size = 2) + geom_qq_line()
library(ggfortify)
autoplot(lm1, which = 1:2)
# data2 %>% ggplot() +
# aes(sample = resid2) +
# geom_qq(size = 2) + geom_qq_line()
autoplot(lm2, which = 1:2)
# data3 %>% ggplot() +
# aes(sample = resid3) +
# geom_qq(size = 2) + geom_qq_line()
autoplot(lm3, which = 1:2)
# data4 %>% ggplot() +
# aes(sample = resid4) +
# geom_qq(size = 2) + geom_qq_line()
autoplot(lm4, which = 1:2)
# data5 %>% ggplot() +
# aes(sample = resid5) +
# geom_qq(size = 2) + geom_qq_line()
autoplot(lm5, which = 1:2)
# data6 %>% ggplot() +
# aes(sample = resid6) +
# geom_qq(size = 2) + geom_qq_line()
autoplot(lm6, which = 1:2)
autoplot(lm7, which = 1:2)
# install.packages("GGally")
library(GGally)
my_fn <- function(data, mapping, ...){
p <- ggplot(data = data, mapping = mapping) +
geom_point() +
geom_smooth(method=loess, fill="red", color="red", se = FALSE)
# geom_smooth(method=lm, fill="blue", color="blue", se = FALSE)
p
}
GGally::ggpairs(data, columns = 1:9, lower = list(continuous = my_fn)) + theme_bw(base_size = 10)
lmTotal = lm(rings ~ ., data)
summary(lmTotal)$coefficients %>% round(4)
## Warning in summary.lm(lmTotal): essentially perfect fit: summary may be
## unreliable
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0 0 -2.717330e+01 0.0000
## sexF 0 0 -1.287000e-01 0.8976
## sexI 0 0 4.511900e+00 0.0000
## length 0 0 3.372000e+00 0.0008
## diam 0 0 -2.064300e+00 0.0391
## height 0 0 -4.413900e+00 0.0000
## weight_whole 0 0 -1.073100e+01 0.0000
## weight_shucked 0 0 2.127930e+01 0.0000
## weight_viscera 0 0 4.098300e+00 0.0000
## weight_shell 0 0 -1.371900e+01 0.0000
## lorings 1 0 3.550611e+16 0.0000
lmTotal
##
## Call:
## lm(formula = rings ~ ., data = data)
##
## Coefficients:
## (Intercept) sexF sexI length diam
## -4.268e-15 -1.695e-18 6.957e-17 5.128e-16 -2.887e-16
## height weight_whole weight_shucked weight_viscera weight_shell
## -2.191e-16 -9.811e-16 1.026e-15 1.423e-16 -6.561e-16
## lorings
## 1.000e+00
extract_eq(lmTotal, use_coefs = TRUE)
## Warning in summary.lm(x): essentially perfect fit: summary may be unreliable
## $$
## \operatorname{rings} = 0 + 0(\operatorname{sex}_{\operatorname{F}}) + 0(\operatorname{sex}_{\operatorname{I}}) + 0(\operatorname{length}) + 0(\operatorname{diam}) + 0(\operatorname{height}) + 0(\operatorname{weight\_whole}) + 0(\operatorname{weight\_shucked}) + 0(\operatorname{weight\_viscera}) + 0(\operatorname{weight\_shell}) + 1(\operatorname{lorings}) + \epsilon
## $$